/*
Look at Footnote 24:
http://scholar.harvard.edu/files/melitz/files/cunat_and_melitz_-_2012_-_volatility_labor_market_flexibility_and_the_patt.pdf

var1, var2 could be SIC, SITC or NAICS
digits is the number of digits TO CUT for which table should create concordances of the two classifications
ie if we want to have NAICS 4 digits we should cut 2 digits, therefore digit = 2 (because NAICS is defined at 6 digits)
SIC and SITC are defined at 5 digits

Example:
do "~/shared_space/dbaharc/General_Data/CodesConcordances/NAICS-SIC_to_SITC.do" sitc2 1 naics 0
*/

local var1 `1'
local var2 `3'
local digits1 `2'
local digits2 `4'

clear all
set more off

global pathtemp "/nfs/home/D/dbaharc/shared_space/dbaharc/General_Data/CodesConcordances/"

save "${pathtemp}dataset.dta", replace emptyok
//local var1 naics /*It can be sic too*/
//local var2 sitc2

forval y=0(1)6 {
    //copy "http://cid.econ.ucdavis.edu/data/sasstata/usiss/imp0`y'.zip" "${pathtemp}conversion_files/imp0`y'.zip", replace
    cd "${pathtemp}conversion_files/"
    unzipfile "${pathtemp}conversion_files/imp0`y'.zip"
    rm "${pathtemp}conversion_files/imp0`y'_con.sas7bdat"

    use "${pathtemp}conversion_files/imp0`y'_con", clear
    keep year hs ccode cvalue `var1' `var2'
    drop if cvalue == 0
    destring year `var2' `var1', replace force
    drop if `var1' == . | `var2' == .
    replace `var1' = floor(`var1'/(10^`digits1'))
    replace `var2' = floor(`var2'/(10^`digits2'))
    collapse (sum) cvalue, by(year hs `var1' `var2')
    append using "${pathtemp}dataset.dta"
    save "${pathtemp}dataset.dta", replace
    rm "${pathtemp}conversion_files/imp0`y'_con.dta"
}

rm "${pathtemp}dataset.dta"
collapse (sum) cvalue, by(hs `var1' `var2')
gsort - cvalue
duplicates drop hs, force
drop hs

collapse (sum) cvalue, by(`var1' `var2')

egen total_`var2'=total(cvalue), by(`var2')
egen total_`var1'=total(cvalue), by(`var1')

gen share_`var2'=(cvalue/total_`var2')
gen share_`var1'=(cvalue/total_`var1')

*drop total* cvalue
//save "${pathtemp}`var2'_`var1'_concordance.dta", replace

